Started: 2020-09-10
Last edited: 2020-09-16 20:00:32
library(tidyverse)
# single cell
library(Seurat)
# plotting
library(patchwork)
library(ggthemes)
Alice had performed bulk-RNA seq on the placenta sample from COVID-19 positive mothers and control placentas and has a list of genes that were differentially expressed. Here we will use the single-cell RNA seq data to figure out in which cell types those DE genes are expressed.
I have already annotated the clusters. The annotated data (a seurat object) was saved as .rds, which we have to read.
seur <- readRDS("../results/02_annotation/seurat-object_annotated.rds")
# set active assay
DefaultAssay(seur) <- "SCT"
# set levels for idents (use merged_annotations)
seur@meta.data$annotation_merged <- factor(seur@meta.data$annotation_merged,
levels = sort(unique(seur@meta.data$annotation_merged)))
# set idents
Idents(seur) <- seur@meta.data$annotation_merged
Below are the DE genes sent by Alice.
genes <- list()
genes$focused <- c("HSPA1A", "PPP1R11", "LY6GLY6C", "ITGAX", "IFITM1", "C1QC", "CCL2", "OAS3", "MX1")
genes$analysis1 <- c("HSPA1A", "FMC1-LUC7L2", "HSPA1B", "AC011511.4", "PPP1R11", "AL139300.1", "LY6GLY6C")
genes$analysis2 <- c("ITGAX", "OAS3", "IFITM1", "MX1", "C1QC", "MX2", "CCL2")
genes$additional <- c("C1QTNF2", "LYVE1", "TREM1", "FOLR2", "C1QB", "CCL2", "TNFRSF10C", "CXCL9", "IL1R2", "IL36A", "CD28", "OAS1", "IL1RN", "CD36", "CXCR2", "SERPING1", "CXCR1", "TNFRSF10C", "C1QA", "HCST", "IL36A", "IL4R", "LY96", "IL1R2", "CXCR2", "CXCL2", "S100A7", "IFITM3", "SELENOM", "SELENOP", "C3AR1", "CCL2", "CCL8")
genes$entry <- c("ACE2", "TMPRSS2", "BSG", "DPP4", "CTSL", "CTSB", "FURIN")
all.genes <- c(genes$focused, genes$analysis1, genes$analysis2, genes$additional, genes$entry) %>%
unique() %>%
sort()
# dotplot function
DotPlot2 <- function(object = seur, assay = "SCT", features, title = "", ...) {
p <- DotPlot(object,
assay = assay,
features = features,
dot.scale = 4,
dot.min = 0.01,
...) +
coord_flip() +
labs(caption = paste0("sctransform normalized expression"), title = title) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.1),
legend.key.size = unit(0.75, "line"))
return(p)
}
DotPlot2(features = all.genes)
The following requested variables were not found: AC011511.4, AL139300.1, FMC1-LUC7L2, LY6GLY6C
cowplot::ggsave2(last_plot(),
filename = "../results/03_de-genes/plots/dotplot_all-genes.pdf",
width = 6.5, height = 8, units = "in")
# violin function
VlnPlot2 <- function(object = seur, assay = "SCT", feature, ...) {
VlnPlot(object = object,
features = feature,
assay = assay,
same.y.lims = FALSE,
split.by = "covid",
split.plot = TRUE,
pt.size = 0,
...) +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.2),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.key.size = unit(0.75, "lines"))
}
# umap function
FeaturePlot2 <- function(object = seur, feature, ...) {
DefaultAssay(object) <- "SCT"
FeaturePlot(object,
feature = feature,
split.by = "covid",
pt.size = 0.1,
order = TRUE,
min.cutoff = "q10",
combine = TRUE,
...) &
plot_annotation(title = feature) &
theme_bw() +
theme(panel.grid.major = element_line(size = 0.25),
panel.grid.minor = element_blank(),
legend.key.size = unit(0.75, "line"),
legend.text = element_text(size = 8, angle = 90, hjust = 1),
legend.position = "bottom",
axis.title = element_text(size = 8),
plot.title = element_text(size = 12),
axis.title.y.right = element_blank(),
axis.text = element_blank(),
aspect.ratio = 1)
}
for(i in all.genes[all.genes %in% rownames(seur)]) {
vplot <- VlnPlot2(feature = i)
fplot <- FeaturePlot2(feature = i, max.cutoff = "q99")
cowplot::ggsave2(vplot,
filename = paste0("../results/03_de-genes/plots/", i, "_violinplot.pdf"),
width = 6, height = 3.5, units = "in")
cowplot::ggsave2(fplot,
filename = paste0("../results/03_de-genes/plots/", i, "_on-umap.png"),
width = 5, height = 4, units = "in")
vf <- vplot + fplot +
plot_layout(guides = "collect", ncol = 1, widths = c(1, 1)) +
plot_annotation(caption = "sctransform normalized expression")
print(vf)
}
The default behaviour of split.by has changed.
Separate violin plots are now plotted side-by-side.
To restore the old behaviour of a single split violin,
set split.plot = TRUE.
This message will be shown once per session.
For some genes, the umap plots and violin plots seem to disagree with each other. That is, the gene is expressed at higher level in “covid” than “control” according to the violin plot, but the colours appear stronger for “control” on umap plots. I think this is simply because of the differing number of cells from “control” and “covid”. We have fewer cells from “covid” samples (as is evident also on the umap plots), so the lightness or sparseness of blue on umap plots just reflect that. Violin plots on the other hand scale the width of violins to the total number of cells in each group, so this difference is not seen in violin plots.
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Mojave 10.14.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggthemes_4.2.0 patchwork_1.0.1 Seurat_3.2.1 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4
[8] readr_1.3.1 tidyr_1.1.2 tibble_3.0.3 ggplot2_3.3.2 tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] Rtsne_0.15 colorspace_1.4-1 deldir_0.1-28 ellipsis_0.3.1 ggridges_0.5.2
[6] fs_1.5.0 spatstat.data_1.4-3 rstudioapi_0.11 farver_2.0.3 leiden_0.3.3
[11] listenv_0.8.0 ggrepel_0.8.2 fansi_0.4.1 lubridate_1.7.9 xml2_1.3.2
[16] codetools_0.2-16 splines_4.0.2 knitr_1.29 polyclip_1.10-0 jsonlite_1.7.1
[21] broom_0.7.0 ica_1.0-2 cluster_2.1.0 dbplyr_1.4.4 png_0.1-7
[26] uwot_0.1.8 shiny_1.5.0 sctransform_0.2.1 compiler_4.0.2 httr_1.4.2
[31] backports_1.1.9 assertthat_0.2.1 Matrix_1.2-18 fastmap_1.0.1 lazyeval_0.2.2
[36] cli_2.0.2 later_1.1.0.1 htmltools_0.5.0 tools_4.0.2 rsvd_1.0.3
[41] igraph_1.2.5 gtable_0.3.0 glue_1.4.2 RANN_2.6.1 reshape2_1.4.4
[46] spatstat_1.64-1 Rcpp_1.0.5 cellranger_1.1.0 vctrs_0.3.4 nlme_3.1-149
[51] lmtest_0.9-38 xfun_0.16 globals_0.12.5 rvest_0.3.6 mime_0.9
[56] miniUI_0.1.1.1 lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2 future_1.18.0
[61] MASS_7.3-52 zoo_1.8-8 scales_1.1.1 spatstat.utils_1.17-0 hms_0.5.3
[66] promises_1.1.1 parallel_4.0.2 RColorBrewer_1.1-2 yaml_2.2.1 reticulate_1.16
[71] pbapply_1.4-3 gridExtra_2.3 rpart_4.1-15 stringi_1.5.3 rlang_0.4.7
[76] pkgconfig_2.0.3 matrixStats_0.56.0 lattice_0.20-41 tensor_1.5 ROCR_1.0-11
[81] labeling_0.3 htmlwidgets_1.5.1 cowplot_1.1.0 tidyselect_1.1.0 RcppAnnoy_0.0.16
[86] plyr_1.8.6 magrittr_1.5 R6_2.4.1 generics_0.0.2 DBI_1.1.0
[91] mgcv_1.8-31 pillar_1.4.6 haven_2.3.1 withr_2.2.0 fitdistrplus_1.1-1
[96] abind_1.4-5 survival_3.2-3 future.apply_1.6.0 modelr_0.1.8 crayon_1.3.4
[101] KernSmooth_2.23-17 plotly_4.9.2.1 grid_4.0.2 readxl_1.3.1 data.table_1.13.0
[106] blob_1.2.1 reprex_0.3.0 digest_0.6.25 xtable_1.8-4 httpuv_1.5.4
[111] munsell_0.5.0 viridisLite_0.3.0